/**
 * 2017年5月17日
 */
package cn.edu.bjtu.tokenization;

import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.ansj.domain.Result;
import org.ansj.domain.Term;
import org.ansj.splitWord.analysis.ToAnalysis;
import org.deeplearning4j.text.tokenization.tokenizer.TokenPreProcess;
import org.deeplearning4j.text.tokenization.tokenizer.Tokenizer;

/**
 * 正常分词,带动词及副词等其他词语
 * @author Alex
 *
 */
public class AnsjTokenizer extends BaseFilterTokenizer implements Tokenizer{
	
	ToAnalysis ta = new ToAnalysis();
	Iterator<Term> iter = null;
	String token = null;
	public AnsjTokenizer(InputStream in) {
		ta = new ToAnalysis(new InputStreamReader(in));
		try {
			iter = ta.parse().iterator();
		} catch (IOException e) {
			e.printStackTrace();
			iter = null;
		}
	}
    
    public AnsjTokenizer(String s){
    	Result result = ta.parseStr(s);
    	iter = result.iterator();
    	
    }
    private boolean has0(){
    	if(!iter.hasNext())return false;
    	token = null;
    	while(token==null && iter.hasNext()){
    		token = iter.next().getName();
    		token = filter(token);
    		if(token.equals("") || token == null){
    			token = null;
    		}
    	}
    	return token != null;
    }
    
    public boolean hasMoreTokens() {
    	if(iter == null) return false;
    	return has0();
    }
    @Override
    public int countTokens() {
        return 1;
    }
    @Override
    public String nextToken() {
       return token;
    }

    @Override
    public List<String> getTokens() {
        List<String> tokens = new ArrayList<>();
        while (hasMoreTokens()) {
        	String token = nextToken();
        	if(!token.equals("")){
        		tokens.add(token);
        	}
        }
        return tokens;
    }

    @Override
    public void setTokenPreProcessor(TokenPreProcess tokenPreProcessor) {
        this.tokenPreProcess = tokenPreProcessor;

    }



	

}
